{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Introduction to DataFrames\n",
    "**[Bogumił Kamiński](http://bogumilkaminski.pl/about/), Apr 21, 2018**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "using DataFrames # load package"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Reshaping DataFrames"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Wide to long"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>M1</th><th>M2</th></tr></thead><tbody><tr><th>1</th><td>1</td><td>1</td><td>11</td><td>111</td></tr><tr><th>2</th><td>2</td><td>1</td><td>12</td><td>112</td></tr><tr><th>3</th><td>3</td><td>2</td><td>13</td><td>113</td></tr><tr><th>4</th><td>4</td><td>2</td><td>14</td><td>114</td></tr></tbody></table>"
      ],
      "text/plain": [
       "4×4 DataFrames.DataFrame\n",
       "│ Row │ id │ id2 │ M1 │ M2  │\n",
       "├─────┼────┼─────┼────┼─────┤\n",
       "│ 1   │ 1  │ 1   │ 11 │ 111 │\n",
       "│ 2   │ 2  │ 1   │ 12 │ 112 │\n",
       "│ 3   │ 3  │ 2   │ 13 │ 113 │\n",
       "│ 4   │ 4  │ 2   │ 14 │ 114 │"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = DataFrame(id=[1,2,3,4], id2=[1,1,2,2], M1=[11,12,13,14], M2=[111,112,113,114])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th><th>id</th></tr></thead><tbody><tr><th>1</th><td>M1</td><td>11</td><td>1</td></tr><tr><th>2</th><td>M1</td><td>12</td><td>2</td></tr><tr><th>3</th><td>M1</td><td>13</td><td>3</td></tr><tr><th>4</th><td>M1</td><td>14</td><td>4</td></tr><tr><th>5</th><td>M2</td><td>111</td><td>1</td></tr><tr><th>6</th><td>M2</td><td>112</td><td>2</td></tr><tr><th>7</th><td>M2</td><td>113</td><td>3</td></tr><tr><th>8</th><td>M2</td><td>114</td><td>4</td></tr></tbody></table>"
      ],
      "text/plain": [
       "8×3 DataFrames.DataFrame\n",
       "│ Row │ variable │ value │ id │\n",
       "├─────┼──────────┼───────┼────┤\n",
       "│ 1   │ M1       │ 11    │ 1  │\n",
       "│ 2   │ M1       │ 12    │ 2  │\n",
       "│ 3   │ M1       │ 13    │ 3  │\n",
       "│ 4   │ M1       │ 14    │ 4  │\n",
       "│ 5   │ M2       │ 111   │ 1  │\n",
       "│ 6   │ M2       │ 112   │ 2  │\n",
       "│ 7   │ M2       │ 113   │ 3  │\n",
       "│ 8   │ M2       │ 114   │ 4  │"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "melt(x, :id, [:M1, :M2]) # first pass id-variables and then measure variables; meltdf makes a view"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>key</th><th>observed</th><th>id</th></tr></thead><tbody><tr><th>1</th><td>M1</td><td>11</td><td>1</td></tr><tr><th>2</th><td>M1</td><td>12</td><td>2</td></tr><tr><th>3</th><td>M1</td><td>13</td><td>3</td></tr><tr><th>4</th><td>M1</td><td>14</td><td>4</td></tr><tr><th>5</th><td>M2</td><td>111</td><td>1</td></tr><tr><th>6</th><td>M2</td><td>112</td><td>2</td></tr><tr><th>7</th><td>M2</td><td>113</td><td>3</td></tr><tr><th>8</th><td>M2</td><td>114</td><td>4</td></tr></tbody></table>"
      ],
      "text/plain": [
       "8×3 DataFrames.DataFrame\n",
       "│ Row │ key │ observed │ id │\n",
       "├─────┼─────┼──────────┼────┤\n",
       "│ 1   │ M1  │ 11       │ 1  │\n",
       "│ 2   │ M1  │ 12       │ 2  │\n",
       "│ 3   │ M1  │ 13       │ 3  │\n",
       "│ 4   │ M1  │ 14       │ 4  │\n",
       "│ 5   │ M2  │ 111      │ 1  │\n",
       "│ 6   │ M2  │ 112      │ 2  │\n",
       "│ 7   │ M2  │ 113      │ 3  │\n",
       "│ 8   │ M2  │ 114      │ 4  │"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# optionally you can rename columns; melt and stack are identical but order of arguments is reversed\n",
    "stack(x, [:M1, :M2], :id, variable_name=:key, value_name=:observed) # first measures and then id-s; stackdf creates view"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th><th>id</th><th>id2</th></tr></thead><tbody><tr><th>1</th><td>M1</td><td>11</td><td>1</td><td>1</td></tr><tr><th>2</th><td>M1</td><td>12</td><td>2</td><td>1</td></tr><tr><th>3</th><td>M1</td><td>13</td><td>3</td><td>2</td></tr><tr><th>4</th><td>M1</td><td>14</td><td>4</td><td>2</td></tr><tr><th>5</th><td>M2</td><td>111</td><td>1</td><td>1</td></tr><tr><th>6</th><td>M2</td><td>112</td><td>2</td><td>1</td></tr><tr><th>7</th><td>M2</td><td>113</td><td>3</td><td>2</td></tr><tr><th>8</th><td>M2</td><td>114</td><td>4</td><td>2</td></tr></tbody></table>"
      ],
      "text/plain": [
       "8×4 DataFrames.DataFrame\n",
       "│ Row │ variable │ value │ id │ id2 │\n",
       "├─────┼──────────┼───────┼────┼─────┤\n",
       "│ 1   │ M1       │ 11    │ 1  │ 1   │\n",
       "│ 2   │ M1       │ 12    │ 2  │ 1   │\n",
       "│ 3   │ M1       │ 13    │ 3  │ 2   │\n",
       "│ 4   │ M1       │ 14    │ 4  │ 2   │\n",
       "│ 5   │ M2       │ 111   │ 1  │ 1   │\n",
       "│ 6   │ M2       │ 112   │ 2  │ 1   │\n",
       "│ 7   │ M2       │ 113   │ 3  │ 2   │\n",
       "│ 8   │ M2       │ 114   │ 4  │ 2   │"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# if second argument is omitted in melt or stack , all other columns are assumed to be the second argument\n",
    "# but measure variables are selected only if they are <: AbstractFloat\n",
    "melt(x, [:id, :id2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th><th>id</th><th>id2</th></tr></thead><tbody><tr><th>1</th><td>M1</td><td>11</td><td>1</td><td>1</td></tr><tr><th>2</th><td>M1</td><td>12</td><td>2</td><td>1</td></tr><tr><th>3</th><td>M1</td><td>13</td><td>3</td><td>2</td></tr><tr><th>4</th><td>M1</td><td>14</td><td>4</td><td>2</td></tr><tr><th>5</th><td>M2</td><td>111</td><td>1</td><td>1</td></tr><tr><th>6</th><td>M2</td><td>112</td><td>2</td><td>1</td></tr><tr><th>7</th><td>M2</td><td>113</td><td>3</td><td>2</td></tr><tr><th>8</th><td>M2</td><td>114</td><td>4</td><td>2</td></tr></tbody></table>"
      ],
      "text/plain": [
       "8×4 DataFrames.DataFrame\n",
       "│ Row │ variable │ value │ id │ id2 │\n",
       "├─────┼──────────┼───────┼────┼─────┤\n",
       "│ 1   │ M1       │ 11    │ 1  │ 1   │\n",
       "│ 2   │ M1       │ 12    │ 2  │ 1   │\n",
       "│ 3   │ M1       │ 13    │ 3  │ 2   │\n",
       "│ 4   │ M1       │ 14    │ 4  │ 2   │\n",
       "│ 5   │ M2       │ 111   │ 1  │ 1   │\n",
       "│ 6   │ M2       │ 112   │ 2  │ 1   │\n",
       "│ 7   │ M2       │ 113   │ 3  │ 2   │\n",
       "│ 8   │ M2       │ 114   │ 4  │ 2   │"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "melt(x, [1, 2]) # you can use index instead of symbol"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.288607 seconds (25.97 k allocations: 230.395 MiB, 31.10% gc time)\n",
      "  0.252100 seconds (127 allocations: 228.889 MiB, 51.72% gc time)\n",
      "  0.258912 seconds (102.99 k allocations: 5.480 MiB)\n",
      "  0.000139 seconds (85 allocations: 5.250 KiB)\n"
     ]
    }
   ],
   "source": [
    "bigx = DataFrame(rand(10^6, 10)) # a test comparing creation of new DataFrame and a view\n",
    "bigx[:id] = 1:10^6\n",
    "@time melt(bigx, :id)\n",
    "@time melt(bigx, :id)\n",
    "@time meltdf(bigx, :id)\n",
    "@time meltdf(bigx, :id);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>a1</th><th>a2</th></tr></thead><tbody><tr><th>1</th><td>1</td><td>'a'</td><td>0.458145</td><td>0.142355</td></tr><tr><th>2</th><td>1</td><td>'b'</td><td>0.799502</td><td>0.631973</td></tr><tr><th>3</th><td>1</td><td>'c'</td><td>0.665952</td><td>0.204755</td></tr></tbody></table>"
      ],
      "text/plain": [
       "3×4 DataFrames.DataFrame\n",
       "│ Row │ id │ id2 │ a1       │ a2       │\n",
       "├─────┼────┼─────┼──────────┼──────────┤\n",
       "│ 1   │ 1  │ 'a' │ 0.458145 │ 0.142355 │\n",
       "│ 2   │ 1  │ 'b' │ 0.799502 │ 0.631973 │\n",
       "│ 3   │ 1  │ 'c' │ 0.665952 │ 0.204755 │"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = DataFrame(id = [1,1,1], id2=['a','b','c'], a1 = rand(3), a2 = rand(3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th><th>id</th><th>id2</th></tr></thead><tbody><tr><th>1</th><td>a1</td><td>0.458145</td><td>1</td><td>'a'</td></tr><tr><th>2</th><td>a1</td><td>0.799502</td><td>1</td><td>'b'</td></tr><tr><th>3</th><td>a1</td><td>0.665952</td><td>1</td><td>'c'</td></tr><tr><th>4</th><td>a2</td><td>0.142355</td><td>1</td><td>'a'</td></tr><tr><th>5</th><td>a2</td><td>0.631973</td><td>1</td><td>'b'</td></tr><tr><th>6</th><td>a2</td><td>0.204755</td><td>1</td><td>'c'</td></tr></tbody></table>"
      ],
      "text/plain": [
       "6×4 DataFrames.DataFrame\n",
       "│ Row │ variable │ value    │ id │ id2 │\n",
       "├─────┼──────────┼──────────┼────┼─────┤\n",
       "│ 1   │ a1       │ 0.458145 │ 1  │ 'a' │\n",
       "│ 2   │ a1       │ 0.799502 │ 1  │ 'b' │\n",
       "│ 3   │ a1       │ 0.665952 │ 1  │ 'c' │\n",
       "│ 4   │ a2       │ 0.142355 │ 1  │ 'a' │\n",
       "│ 5   │ a2       │ 0.631973 │ 1  │ 'b' │\n",
       "│ 6   │ a2       │ 0.204755 │ 1  │ 'c' │"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "melt(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th></tr></thead><tbody><tr><th>1</th><td>x1</td><td>0.284223</td></tr><tr><th>2</th><td>x1</td><td>0.498334</td></tr><tr><th>3</th><td>x1</td><td>0.84188</td></tr><tr><th>4</th><td>x2</td><td>0.845803</td></tr><tr><th>5</th><td>x2</td><td>0.722629</td></tr><tr><th>6</th><td>x2</td><td>0.995074</td></tr></tbody></table>"
      ],
      "text/plain": [
       "6×2 DataFrames.DataFrame\n",
       "│ Row │ variable │ value    │\n",
       "├─────┼──────────┼──────────┤\n",
       "│ 1   │ x1       │ 0.284223 │\n",
       "│ 2   │ x1       │ 0.498334 │\n",
       "│ 3   │ x1       │ 0.84188  │\n",
       "│ 4   │ x2       │ 0.845803 │\n",
       "│ 5   │ x2       │ 0.722629 │\n",
       "│ 6   │ x2       │ 0.995074 │"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "melt(DataFrame(rand(3,2))) # by default stack and melt treats floats as value columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th><th>key</th></tr></thead><tbody><tr><th>1</th><td>x1</td><td>0.876846</td><td>1</td></tr><tr><th>2</th><td>x1</td><td>0.0686333</td><td>1</td></tr><tr><th>3</th><td>x1</td><td>0.0815149</td><td>1</td></tr><tr><th>4</th><td>x2</td><td>0.984013</td><td>1</td></tr><tr><th>5</th><td>x2</td><td>0.139363</td><td>1</td></tr><tr><th>6</th><td>x2</td><td>0.859373</td><td>1</td></tr></tbody></table>"
      ],
      "text/plain": [
       "6×3 DataFrames.DataFrame\n",
       "│ Row │ variable │ value     │ key │\n",
       "├─────┼──────────┼───────────┼─────┤\n",
       "│ 1   │ x1       │ 0.876846  │ 1   │\n",
       "│ 2   │ x1       │ 0.0686333 │ 1   │\n",
       "│ 3   │ x1       │ 0.0815149 │ 1   │\n",
       "│ 4   │ x2       │ 0.984013  │ 1   │\n",
       "│ 5   │ x2       │ 0.139363  │ 1   │\n",
       "│ 6   │ x2       │ 0.859373  │ 1   │"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = DataFrame(rand(3,2))\n",
    "df[:key] = [1,1,1]\n",
    "mdf = melt(df) # duplicates in key are silently accepted"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Long to wide"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>a1</th><th>a2</th></tr></thead><tbody><tr><th>1</th><td>1</td><td>'a'</td><td>0.754369</td><td>0.095943</td></tr><tr><th>2</th><td>1</td><td>'b'</td><td>0.494067</td><td>0.227082</td></tr><tr><th>3</th><td>1</td><td>'c'</td><td>0.622234</td><td>0.791082</td></tr></tbody></table>"
      ],
      "text/plain": [
       "3×4 DataFrames.DataFrame\n",
       "│ Row │ id │ id2 │ a1       │ a2       │\n",
       "├─────┼────┼─────┼──────────┼──────────┤\n",
       "│ 1   │ 1  │ 'a' │ 0.754369 │ 0.095943 │\n",
       "│ 2   │ 1  │ 'b' │ 0.494067 │ 0.227082 │\n",
       "│ 3   │ 1  │ 'c' │ 0.622234 │ 0.791082 │"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = DataFrame(id = [1,1,1], id2=['a','b','c'], a1 = rand(3), a2 = rand(3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>a1</th><th>a2</th></tr></thead><tbody><tr><th>1</th><td>1</td><td>'a'</td><td>0.754369</td><td>0.095943</td></tr><tr><th>2</th><td>1</td><td>'b'</td><td>0.494067</td><td>0.227082</td></tr><tr><th>3</th><td>1</td><td>'c'</td><td>0.622234</td><td>0.791082</td></tr></tbody></table>"
      ],
      "text/plain": [
       "3×4 DataFrames.DataFrame\n",
       "│ Row │ id │ id2 │ a1       │ a2       │\n",
       "├─────┼────┼─────┼──────────┼──────────┤\n",
       "│ 1   │ 1  │ 'a' │ 0.754369 │ 0.095943 │\n",
       "│ 2   │ 1  │ 'b' │ 0.494067 │ 0.227082 │\n",
       "│ 3   │ 1  │ 'c' │ 0.622234 │ 0.791082 │"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th><th>id</th><th>id2</th></tr></thead><tbody><tr><th>1</th><td>a1</td><td>0.754369</td><td>1</td><td>'a'</td></tr><tr><th>2</th><td>a1</td><td>0.494067</td><td>1</td><td>'b'</td></tr><tr><th>3</th><td>a1</td><td>0.622234</td><td>1</td><td>'c'</td></tr><tr><th>4</th><td>a2</td><td>0.095943</td><td>1</td><td>'a'</td></tr><tr><th>5</th><td>a2</td><td>0.227082</td><td>1</td><td>'b'</td></tr><tr><th>6</th><td>a2</td><td>0.791082</td><td>1</td><td>'c'</td></tr></tbody></table>"
      ],
      "text/plain": [
       "6×4 DataFrames.DataFrame\n",
       "│ Row │ variable │ value    │ id │ id2 │\n",
       "├─────┼──────────┼──────────┼────┼─────┤\n",
       "│ 1   │ a1       │ 0.754369 │ 1  │ 'a' │\n",
       "│ 2   │ a1       │ 0.494067 │ 1  │ 'b' │\n",
       "│ 3   │ a1       │ 0.622234 │ 1  │ 'c' │\n",
       "│ 4   │ a2       │ 0.095943 │ 1  │ 'a' │\n",
       "│ 5   │ a2       │ 0.227082 │ 1  │ 'b' │\n",
       "│ 6   │ a2       │ 0.791082 │ 1  │ 'c' │"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "y = melt(x, [1,2])\n",
    "display(x)\n",
    "display(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id2</th><th>a1</th><th>a2</th></tr></thead><tbody><tr><th>1</th><td>'a'</td><td>0.754369</td><td>0.095943</td></tr><tr><th>2</th><td>'b'</td><td>0.494067</td><td>0.227082</td></tr><tr><th>3</th><td>'c'</td><td>0.622234</td><td>0.791082</td></tr></tbody></table>"
      ],
      "text/plain": [
       "3×3 DataFrames.DataFrame\n",
       "│ Row │ id2 │ a1       │ a2       │\n",
       "├─────┼─────┼──────────┼──────────┤\n",
       "│ 1   │ 'a' │ 0.754369 │ 0.095943 │\n",
       "│ 2   │ 'b' │ 0.494067 │ 0.227082 │\n",
       "│ 3   │ 'c' │ 0.622234 │ 0.791082 │"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unstack(y, :id2, :variable, :value) # stndard unstack with a unique key"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>id2</th><th>a1</th><th>a2</th></tr></thead><tbody><tr><th>1</th><td>1</td><td>'a'</td><td>0.754369</td><td>0.095943</td></tr><tr><th>2</th><td>1</td><td>'b'</td><td>0.494067</td><td>0.227082</td></tr><tr><th>3</th><td>1</td><td>'c'</td><td>0.622234</td><td>0.791082</td></tr></tbody></table>"
      ],
      "text/plain": [
       "3×4 DataFrames.DataFrame\n",
       "│ Row │ id │ id2 │ a1       │ a2       │\n",
       "├─────┼────┼─────┼──────────┼──────────┤\n",
       "│ 1   │ 1  │ 'a' │ 0.754369 │ 0.095943 │\n",
       "│ 2   │ 1  │ 'b' │ 0.494067 │ 0.227082 │\n",
       "│ 3   │ 1  │ 'c' │ 0.622234 │ 0.791082 │"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unstack(y, :variable, :value) # all other columns are treated as keys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[1m\u001b[33mWarning: \u001b[39m\u001b[22m\u001b[33mDuplicate entries in unstack at row 2 for key 1 and variable a1.\u001b[39m\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>id</th><th>a1</th><th>a2</th></tr></thead><tbody><tr><th>1</th><td>1</td><td>0.622234</td><td>0.791082</td></tr></tbody></table>"
      ],
      "text/plain": [
       "1×3 DataFrames.DataFrame\n",
       "│ Row │ id │ a1       │ a2       │\n",
       "├─────┼────┼──────────┼──────────┤\n",
       "│ 1   │ 1  │ 0.622234 │ 0.791082 │"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# by default :id, :variable and :value names are assumed; in this case it produces duplicate keys\n",
    "unstack(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>variable</th><th>value</th></tr></thead><tbody><tr><th>1</th><td>x1</td><td>0.678752</td></tr><tr><th>2</th><td>x1</td><td>0.929795</td></tr><tr><th>3</th><td>x1</td><td>0.884891</td></tr><tr><th>4</th><td>x2</td><td>0.925154</td></tr><tr><th>5</th><td>x2</td><td>0.474071</td></tr><tr><th>6</th><td>x2</td><td>0.731631</td></tr></tbody></table>"
      ],
      "text/plain": [
       "6×2 DataFrames.DataFrame\n",
       "│ Row │ variable │ value    │\n",
       "├─────┼──────────┼──────────┤\n",
       "│ 1   │ x1       │ 0.678752 │\n",
       "│ 2   │ x1       │ 0.929795 │\n",
       "│ 3   │ x1       │ 0.884891 │\n",
       "│ 4   │ x2       │ 0.925154 │\n",
       "│ 5   │ x2       │ 0.474071 │\n",
       "│ 6   │ x2       │ 0.731631 │"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = stack(DataFrame(rand(3,2)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "ename": "LoadError",
     "evalue": "\u001b[91mArgumentError: No key column found\u001b[39m",
     "output_type": "error",
     "traceback": [
      "\u001b[91mArgumentError: No key column found\u001b[39m",
      "",
      "Stacktrace:",
      " [1] \u001b[1munstack\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::DataFrames.DataFrame, ::Array{Symbol,1}, ::Int64, ::Int64\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\abstractdataframe\\reshape.jl:279\u001b[22m\u001b[22m",
      " [2] \u001b[1munstack\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::DataFrames.DataFrame, ::Int64, ::Int64\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\abstractdataframe\\reshape.jl:269\u001b[22m\u001b[22m",
      " [3] \u001b[1munstack\u001b[22m\u001b[22m\u001b[1m(\u001b[22m\u001b[22m::DataFrames.DataFrame, ::Symbol, ::Symbol\u001b[1m)\u001b[22m\u001b[22m at \u001b[1mD:\\Software\\JULIA_PKG\\v0.6\\DataFrames\\src\\abstractdataframe\\reshape.jl:265\u001b[22m\u001b[22m"
     ]
    }
   ],
   "source": [
    "unstack(df, :variable, :value) # unable to unstack when no key column is present"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Julia 0.6.2",
   "language": "julia",
   "name": "julia-0.6"
  },
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "0.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
